home *** CD-ROM | disk | FTP | other *** search
- /*
- is_alike() Determins whether two character strings are
- sufficiently equal using the 'Levenstein distance'
- algorithm.
-
- Limitation: For memory space conservation and execution speed
- this implementation compares the first 20
- characters of both string only. See l_distance.c()
- listing.
- '#define COMP_LEN' may be changed to decrease or
- increase the number of characters compared.
-
- Returns: The constant ACCEPTED or REJECTED.
-
- De-commenting the following '#define DEBUG' will
- create a stand-alone program, which enables you
- to experiment with different values for the
- constants 'addition', 'change' and 'deletion'.
- */
-
- /* #define DEBUG */
-
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- #include <ctype.h>
-
- #ifdef DEBUG
- #include <stdio.h>
- #endif
-
- int addition = 1; /* change constants in this block */
- int change = 2; /* of four lines if needed. */
- int deletion = 3;
-
- #define COMP_LEN 20
- #define TU(x) toupper(x)
- #define ARR_SIZE COMP_LEN + 1
- #define SMALLEST_OF(x,y,z) ( (x<y) ? min(x,z) : min(y,z) )
- #define ZERO_IF_EQUAL(x,y) (TU(requested[x-1])==TU(found[y-1]) ? 0 : change)
- #define ACCEPTED 1
- #define REJECTED 0
-
- int distance[ARR_SIZE][ARR_SIZE];
-
- int is_alike(char *requested, char *found)
- {
- register int i,j;
- int r_len, f_len, threshold;
-
- /* first test: compare first character of both strings */
- if (toupper(*requested) != toupper(*found)) {
- #ifdef DEBUG
- printf("\nRejected due to difference in first letters\n");
- #endif
- return(REJECTED); /* different first characters */
- }
-
- r_len = (strlen(requested)>COMP_LEN ? COMP_LEN : strlen(requested));
- f_len = (strlen(found)>COMP_LEN ? COMP_LEN : strlen(found));
-
- /* second test: comparing string length */
- threshold = (int) floor( (double) 1 + ((r_len + 2) / 4));
- if ( abs(r_len - f_len) > threshold) {
- #ifdef DEBUG
- printf("\nRejected due to large length difference\n");
- #endif
- return(REJECTED); /* length difference too big */
- }
-
- distance[0][0] = 0;
- for (j = 1; j <= ARR_SIZE ; j++)
- distance[0][j] = distance[0][j-1] + addition;
- for (j = 1; j <= ARR_SIZE ; j++)
- distance[j][0] = distance[j-1][0] + deletion;
-
- for (i = 1; i <= r_len; i++)
- for (j = 1; j <= f_len; j++)
- distance[i][j] = SMALLEST_OF(
- (distance[i-1][j-1] + ZERO_IF_EQUAL(i,j)),
- (distance[i][j-1] + addition),
- (distance[i-1][j] + deletion) );
- #ifdef DEBUG
- printf("\nis_alike(): threshold %d\n\n",threshold);
- for (i=1; i <= r_len; i++) {
- for (j=1; j <= f_len; j++)
- printf(" %3d",distance[i][j]);
- printf("\n");
- }
- printf("\nis_alike(): l_distance is %d\n",distance[r_len][f_len]);
- #endif
-
- /* third test: Levenstein distance within threshold limit ? */
-
- if ( distance[r_len][f_len] <= threshold )
- return(ACCEPTED); /* distance within limits */
- else
- return(REJECTED); /* distance too big */
- }
-
- #ifdef DEBUG
- void usage()
- {
- printf("\nUsage: IA requested found\n");
- printf("\n requested & found are the two strings to");
- printf("\n be compared with the is_alike() function");
- printf("\n containing the Levenstein distance algorithm.\n\n");
- }
-
- int main(int argc, char *argv[])
- {
- if (argc != 3) {
- usage();
- exit(1);
- } else {
- printf("\nMain(): Comparing '%s' and '%s':\n", argv[1], argv[2]);
- if (is_alike(argv[1], argv[2]))
- printf("\nMain(): Strings sufficiently alike...\n\n");
- else
- printf("\nMain(): Strings differing too much...\n\n");
- }
- return(0);
- }
- #endif
-